Overview

This document integrates analysis of DRIFTS DPT files from OPUSprocessing.Rmd and Leila_code_modified.Rmd.

  1. OPUSprocessing.Rmd “Reading DPT files” section - Ridge plots showing spectral data by crop at different timepoints
  2. Leila_code_modified.Rmd - Bar plots showing functional group proportions

Data Import and Processing

Import DPT Data

This code is modified from code by Stephany Soledad Chacon, “Box> Salk Institute Project> Salk Data> FTIR_Intact_decomposition_pots> FTIR_analysis.Rmd”. Using data in the DPT format, we can extrapolate sample attributes from the file names, then normalize and visualize spectra across replicates, crops, and timepoints. The width of the line bounding the spectra represents the range of the data.

# Check if folder exists and list files
if (!dir.exists(dpt_folder)) {
  stop("Directory does not exist: ", dpt_folder)
}

# List and sort files
cat("Files found in directory:\n")
## Files found in directory:
print(list.files(dpt_folder))
##  [1] "DRIFTS_pot001_13Cwheat_wk0_root_250725.0.dpt"        
##  [2] "DRIFTS_pot012_13Csoy_wk10_root_250827.0.dpt"         
##  [3] "DRIFTS_pot029_13Cwheat_wk10_root_250827.0.dpt"       
##  [4] "DRIFTS_pot029_13Cwheat_wk40_root_250827.3.dpt"       
##  [5] "DRIFTS_pot030_13Csoy_wk10_root_250725.0.dpt"         
##  [6] "DRIFTS_pot030_13Csoy_wk40_root_250827.0.dpt"         
##  [7] "DRIFTS_pot032_13Cwheat_wk0_root_250725.0.dpt"        
##  [8] "DRIFTS_pot047_13Crice_wk0_root_250725.0.dpt"         
##  [9] "DRIFTS_pot050_13Csoy_wk0_root_250827.0.dpt"          
## [10] "DRIFTS_pot052_13CnoPlant_wk30_root_250919.0.dpt"     
## [11] "DRIFTS_pot052_13CnoPlant_wk40_root_250919.0.dpt"     
## [12] "DRIFTS_pot053_13Cwheat_wk10_root_250725.0.dpt"       
## [13] "DRIFTS_pot053_13Cwheat_wk40_root_250919.0.dpt"       
## [14] "DRIFTS_pot055_13Crice_wk10_root_250725.0.dpt"        
## [15] "DRIFTS_pot055_13Crice_wk10_root_qmark_250725.0.dpt"  
## [16] "DRIFTS_pot055_13Crice_wk40_root_250827.0.dpt"        
## [17] "DRIFTS_pot056_12Csoy_wk10_root_250919.0.dpt"         
## [18] "DRIFTS_pot062_13Csoy_wk0_root_250725.0.dpt"          
## [19] "DRIFTS_pot079_13Crice_wk0_root_250725.0.dpt"         
## [20] "DRIFTS_pot080_13Csoy_wk10_root_250725.0.dpt"         
## [21] "DRIFTS_pot080_13Csoy_wk40_root_250919.0.dpt"         
## [22] "DRIFTS_pot086_12Csoy_wk0_root_250919.0.dpt"          
## [23] "DRIFTS_pot087_13Crice_wk10_root_250827.0.dpt"        
## [24] "DRIFTS_pot087_13Crice_wk40_root_250919.0.dpt"        
## [25] "DRIFTS_pot094_13Csoy_wk10_root_250725.0.dpt"         
## [26] "DRIFTS_pot094_13Csoy_wk10_soil_250630.0.dpt"         
## [27] "DRIFTS_pot094_13Csoy_wk40_root_250919.0.dpt"         
## [28] "DRIFTS_pot103_13Crice_wk0_root_recNMR_250725.0.dpt"  
## [29] "DRIFTS_pot103_13Crice_wk0_root_vial1_250725.0.dpt"   
## [30] "DRIFTS_pot103_13Crice_wk0_root_vial2_250725.2.dpt"   
## [31] "DRIFTS_pot103_13Crice_wk0_root_vial3_250725.0.dpt"   
## [32] "DRIFTS_pot107_12Crice_wk0_root_really13_250725.0.dpt"
## [33] "DRIFTS_pot109_13Cwheat_wk40_root_250919.0.dpt"       
## [34] "DRIFTS_pot119_13Crice_wk10_root_250827.0.dpt"        
## [35] "DRIFTS_pot119_13Crice_wk40_root_250919.0.dpt"
DPTfiles <- sort(list.files(dpt_folder, pattern = ".dpt"))
cat("Number of .dpt files:", length(DPTfiles), "\n")
## Number of .dpt files: 35
# Read files individually and combine
dpt_data_list <- list()

for (i in seq_along(DPTfiles)) {
  file_path <- file.path(dpt_folder, DPTfiles[i])
  
  # Read as lines and parse manually (DPT files are space-delimited)
  lines <- readLines(file_path, warn = FALSE)
  lines <- lines[lines != ""]  # Remove empty lines
  
  # Split each line by whitespace
  split_lines <- strsplit(lines, "\\s+")
  
  # Convert to data frame
  temp_data <- tryCatch({
    data.frame(
      wavenumber = as.numeric(sapply(split_lines, `[`, 1)),
      absorbance = as.numeric(sapply(split_lines, `[`, 2)),
      stringsAsFactors = FALSE
    )
  }, error = function(e) NULL)
  
  if (!is.null(temp_data)) {
    # Add filename column
    temp_data$filename <- DPTfiles[i]
    
    # Store in list
    dpt_data_list[[i]] <- temp_data
  }
}

# Remove NULL entries (failed reads)
dpt_data_list <- dpt_data_list[!sapply(dpt_data_list, is.null)]

# Combine all data
dpt_data <- bind_rows(dpt_data_list)

# Remove any rows with NA values
dpt_data <- dpt_data[complete.cases(dpt_data), ]

# Extract metadata from filenames
dpt_data <- dpt_data %>%
  mutate(
    crop = case_when(
      str_detect(filename, "noPlant") ~ "noPlant",
      str_detect(filename, "wheat") ~ "wheat", 
      str_detect(filename, "rice") ~ "rice",
      str_detect(filename, "soy") ~ "soy",
      TRUE ~ NA_character_
    ),
    timepoint = str_extract(filename, "wk\\d+"),
    sample_type = case_when(
      str_detect(filename, "root") ~ "root",
      str_detect(filename, "soil") ~ "soil", 
      TRUE ~ NA_character_
    ),
    # Extract ID (pot number) from filename
    ID = case_when(
      str_detect(filename, "pot[0-9]+") ~ str_extract(filename, "pot[0-9]+") %>% 
                                          str_remove("pot") %>% 
                                          str_pad(width = 3, pad = "0"),
      # Alternative pattern for files without 'pot' prefix
      str_detect(filename, "DRIFTS_[0-9]+") ~ str_extract(filename, "DRIFTS_[0-9]+") %>%
                                              str_remove("DRIFTS_") %>%
                                              str_pad(width = 3, pad = "0"),
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(sample_type)) %>%  # Remove files without clear sample type
  filter(!str_detect(filename, "KBr")) %>%  # Remove KBr files
  # Filter out anomalous spectra
  filter(!filename %in% c("DRIFTS_pot103_13Crice_wk0_root_recNMR_250725.0.dpt",
                          "DRIFTS_pot103_13Crice_wk0_root_vial1_250725.0.dpt", 
                          "DRIFTS_pot079_13Crice_wk0_root_250725.0.dpt",
                          "DRIFTS_pot047_13Crice_wk0_root_250725.0.dpt"))
# save as csv for record-keeping
write.csv(dpt_data, file.path(outputs_folder, "dpt_data.csv"), row.names = FALSE)

# Filter for root samples only
dpt_data_roots <- dpt_data %>% 
  filter(sample_type == "root") %>%
  filter(!is.na(crop))

cat("Root samples by crop:\n")
## Root samples by crop:
print(table(dpt_data_roots$crop))
## 
## noPlant    rice     soy   wheat 
##    5598   27990   30789   19593
cat("Root samples by timepoint:\n") 
## Root samples by timepoint:
print(table(dpt_data_roots$timepoint))
## 
##   wk0  wk10  wk30  wk40 
## 22392 30789  2799 27990

Data Normalization

This code is modified from code by Stephany Soledad Chacon, “Box> Salk Institute Project> Salk Data> FTIR_Intact_decomposition_pots> FTIR_analysis.Rmd”. It creates a new column with normalized absorbance values for each sample, using baseline correction (subtracting the minimum) followed by normalization to the maximum absorbance value.

# Normalize absorbance data by sample (same approach as OPUSprocessing.Rmd)
dpt_data_roots <- dpt_data_roots %>%
  group_by(filename) %>%
  mutate(
    # Baseline correction - subtract minimum
    baseline_corrected = absorbance - min(absorbance, na.rm = TRUE),
    # Normalize to maximum
    normalized_absorbance = baseline_corrected / max(baseline_corrected, na.rm = TRUE)
  ) %>%
  ungroup()

Ridge Plots

Plotting by timepoint and type

These plots are of normalized absorbance values. The thickness of the opaque line represents the range of the data. Annotations indicate integration regions used for calculations of simple plant, complex plant, and microbial organic matter proportions.

Simple plant (aliphatic): 2976 - 2898 cm-1 + 2870 - 2839 cm-1
Microbial: 1660 - 1580 cm-1
Complex plant (aromatic): 1550 - 1500 cm-1
# Generic function to create ridge plots for any timepoint
create_timepoint_ridge_plot <- function(data, timepoint_week, title = NULL) {
  # Filter data for specific timepoint
  filtered_data <- data %>%
    filter(timepoint == timepoint_week) %>%
    mutate(crop = factor(crop, levels = c("noPlant", "wheat", "rice", "soy")))
  
  # Create title if not provided
  if (is.null(title)) {
    week_num <- str_extract(timepoint_week, "\\d+")
    title <- paste("Roots at week", week_num)
  }
  
  # Determine y-position for annotations based on data
  max_y <- max(filtered_data$normalized_absorbance)*(length(unique(filtered_data$crop))+3)
  
  # Create the plot
  ridge_plot <- filtered_data %>%
    ggplot(mapping = aes(x = wavenumber,
                         y = crop,
                         height = normalized_absorbance,
                         group = crop,
                         fill = crop,
                         color = crop)) +
    geom_density_ridges(stat = "identity",
                        scale = 3,
                        alpha = 0.25) +
    geom_vline(xintercept = 1500, linetype = "dotted", linewidth = 0.3) +
    geom_vline(xintercept = 1550, linetype = "dotted", linewidth = 0.3) +
    geom_vline(xintercept = 1580, linetype = "dashed", linewidth = 0.3) +
    geom_vline(xintercept = 1660, linetype = "dashed", linewidth = 0.3) +
    geom_vline(xintercept = 2839, linetype = "longdash", linewidth = 0.3) +
    geom_vline(xintercept = 2870, linetype = "longdash", linewidth = 0.3) +
    geom_vline(xintercept = 2898, linetype = "longdash", linewidth = 0.3) +
    geom_vline(xintercept = 2976, linetype = "longdash", linewidth = 0.3) +
    xlim(4000, 400) +
    scale_y_discrete(expand = expansion(mult = c(0.0, 0.56))) +  # Less space below, more above
    theme_classic() +
    scale_fill_manual(values = crop_colors) +
    scale_color_manual(values = crop_colors) +
    ggtitle(title) +
       # spacer
    annotate("text", x = 1900, y = max_y + 0.2, label = " ", 
             size = 3, hjust = 0.5) +
    # Add simple plant region annotation
    annotate("rect", xmin = 2976, xmax = 2898, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#e3c8b1") +
    annotate("rect", xmin = 2870, xmax = 2839, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#e3c8b1") +
    annotate("text", x = 2560, y = max_y, label = "Simple Plant", 
             size = 3, hjust = 0.5) +
    # Add microbial region annotation
    annotate("rect", xmin = 1660, xmax = 1580, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#a8cbad") +
    annotate("text", x = 1870, y = max_y + 0.11, label = "Microbial", 
             size = 3, hjust = 0.5) +
    # Add complex plant region annotation
    annotate("rect", xmin = 1550, xmax = 1500, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#beb5d5") +
    annotate("text", x = 1190, y = max_y + 0.11, label = "Complex Plant", 
             size = 3, hjust = 0.5)
  
  return(list(plot = ridge_plot, data = filtered_data))
}

# Generic function to create sample count tables
create_sample_count_table <- function(filtered_data, timepoint_week) {
  week_num <- str_extract(timepoint_week, "\\d+")
  col_name <- paste("Week", week_num, "samples")
  
  counts <- filtered_data %>%
    select(filename, crop) %>%
    distinct() %>%
    count(crop) %>%
    arrange(match(crop, c("noPlant", "wheat", "rice", "soy"))) %>%
    mutate(!!col_name := paste(n, "&nbsp;&nbsp;", crop)) %>%
    select(all_of(col_name))
  
  return(counts)
}

# Create plots and tables for each timepoint
timepoints <- sort(unique(dpt_data_roots$timepoint))

for (tp in timepoints) {
  # Create plot and get filtered data
  result <- create_timepoint_ridge_plot(dpt_data_roots, tp)
  
  # Print the plot
  print(result$plot)
  
  # Create and print sample count table with results='asis'
  counts_table <- create_sample_count_table(result$data, tp)
  cat(knitr::kable(counts_table, format = "html", escape = FALSE))
  cat("<br><br>")  # Add HTML line breaks for spacing
}
Week 0 samples
2    wheat
3    rice
3    soy


Week 10 samples
2    wheat
4    rice
5    soy


Week 30 samples
1    noPlant


Week 40 samples
1    noPlant
3    wheat
3    rice
3    soy



# # Recreate the crop comparison plot showing suberin-indicative peaks (from OPUSprocessing.Rmd)
# # This follows the style of the "Plotting by crop" section
# suberin_peaks_plot <- ggplot(dpt_data_roots, aes(x = wavenumber, y = normalized_absorbance)) +
#   geom_line(aes(color = crop), alpha = 0.7, size = 0.8) +
#   scale_color_manual(values = crop_colors) +
#   xlim(3200, 800) +
#   labs(x = "Wavenumber (cm⁻¹)", y = "Normalized Absorbance", color = "Crop",
#        title = "Suberin-Indicative Peaks by Crop") +
#   theme_classic() +
#   theme(legend.position = "top") +
#   # Highlight suberin-indicative regions (from OPUSprocessing.Rmd analysis)
#   annotate("rect", xmin = 2950, xmax = 2850, ymin = -Inf, ymax = Inf, 
#            alpha = 0.15, fill = "orange") +
#   annotate("rect", xmin = 1740, xmax = 1720, ymin = -Inf, ymax = Inf, 
#            alpha = 0.15, fill = "red") +
#   annotate("rect", xmin = 1630, xmax = 1600, ymin = -Inf, ymax = Inf, 
#            alpha = 0.15, fill = "blue") +
#   annotate("text", x = 2900, y = 0.9, label = "Aliphatic C-H\n(Suberin)", size = 3) +
#   annotate("text", x = 1730, y = 0.9, label = "C=O stretch\n(Suberin)", size = 3) +
#   annotate("text", x = 1615, y = 0.9, label = "Aromatic C=C", size = 3)

# print(suberin_peaks_plot)

Plotting by crop

These plots are of normalized absorbance values. The thickness of the opaque line represents the range of the data. Annotations indicate integration regions used for calculations of simple plant, complex plant, and microbial organic matter proportions.

Simple plant (aliphatic): 2976 - 2898 cm-1 + 2870 - 2839 cm-1
Microbial: 1660 - 1580 cm-1
Complex plant (aromatic): 1550 - 1500 cm-1
# Define crop-specific color palettes
crop_timepoint_colors <- list(
  wheat = c("wk0" = "#B4D586", "wk10" = "#98C559", "wk20" = "#79A63A",
            "wk30" = "#58792A", "wk40" = "#374B1B"),
  rice = c("wk0" = "#FDB35E", "wk10" = "#FC9D33", "wk29" = "#F18204",
           "wk40" = "#B56203"),
  soy = c("wk0" = "#FE71B1", "wk10" = "#FE318E", "wk20" = "#F4016E",
          "wk30" = "#B70153", "wk40" = "#7A0137")
)

# Generic function to create crop-specific DRIFTS plots
create_crop_plot <- function(data, crop_name, color_palette) {
  # Filter data for specific crop
  crop_data <- data %>%
    group_by(timepoint) %>%
    filter(crop == crop_name)
  
  # Ensure absorbance is numeric
  crop_data$normalized_absorbance <- as.numeric(crop_data$normalized_absorbance)
  
  # Create the plot
  crop_plot <- ggplot(crop_data,
                      aes(x = wavenumber,
                          y = normalized_absorbance,
                          color = timepoint)) +
    xlim(4000, 1000) +
    geom_line() +
    theme_classic() +
    scale_color_manual(values = color_palette) +
    ggtitle(paste(str_to_title(crop_name), "Roots DRIFTS")) +
    geom_vline(xintercept = 1500, linetype = "dotted", linewidth = 0.3) +
    geom_vline(xintercept = 1550, linetype = "dotted", linewidth = 0.3) +
    geom_vline(xintercept = 1580, linetype = "dashed", linewidth = 0.3) +
    geom_vline(xintercept = 1660, linetype = "dashed", linewidth = 0.3) +
    geom_vline(xintercept = 2839, linetype = "longdash", linewidth = 0.3) +
    geom_vline(xintercept = 2870, linetype = "longdash", linewidth = 0.3) +
    geom_vline(xintercept = 2898, linetype = "longdash", linewidth = 0.3) +
    geom_vline(xintercept = 2976, linetype = "longdash", linewidth = 0.3) +
    # Add simple plant region annotation
    annotate("rect", xmin = 2976, xmax = 2898, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#e3c8b1") +
    annotate("rect", xmin = 2870, xmax = 2839, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#e3c8b1") +
    annotate("text", x = 2550, y = max(crop_data$normalized_absorbance), label = "Simple Plant", 
             size = 3, hjust = 0.5) +
    # Add microbial region annotation
    annotate("rect", xmin = 1660, xmax = 1580, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#a8cbad") +
    annotate("text", x = 1850, y = max(crop_data$normalized_absorbance), label = "Microbial", 
             size = 3, hjust = 0.5) +
    # Add complex plant region annotation
    annotate("rect", xmin = 1550, xmax = 1500, ymin = -Inf, ymax = Inf, 
             alpha = 0.2, fill = "#beb5d5") +
    annotate("text", x = 1200, y = max(crop_data$normalized_absorbance)+0.02, label = "Complex Plant", 
             size = 3, hjust = 0.5)
  
  return(list(plot = crop_plot, data = crop_data))
}

# Generic function to create crop ID tables
create_crop_id_table <- function(crop_data, crop_name) {
  combinations <- crop_data %>%
    select(ID, timepoint) %>%
    distinct() %>%
    arrange(ID, timepoint) %>%
    group_by(timepoint) %>%
    summarise(pot_ids = {
      ids <- paste(ID, sep = "")
      # Group every 3 items and join with line breaks
      grouped <- split(ids, ceiling(seq_along(ids)/5))
      lines <- sapply(grouped, function(x) paste(x, collapse = ",&ensp;"))
      paste(lines, collapse = "<br>")
    }, .groups = 'drop') %>%
    pivot_wider(names_from = timepoint, values_from = pot_ids)
  
  return(combinations)
}

# Generate plots and tables for each crop
crops <- c("wheat", "rice", "soy")

for (crop in crops) {
  # Create plot and get data
  result <- create_crop_plot(dpt_data_roots, crop, crop_timepoint_colors[[crop]])
  
  # Print the plot
  print(result$plot)
  cat("<br>")  # Add space between plot and table
  
  # Create and print ID combinations table
  id_table <- create_crop_id_table(result$data, crop)
  cat(paste0("<h4>", str_to_title(crop), " pot root samples</h4>"))
  cat(knitr::kable(id_table, format = "html", escape = FALSE, table.attr = 'style="border-collapse: separate; border-spacing: 35px 0;"'))
  cat("<br><br>")  # Add HTML line breaks for spacing
}

Wheat pot root samples

wk0 wk10 wk40
001, 032 029, 053 029, 053, 109



Rice pot root samples

wk0 wk10 wk40
103, 107 055, 087, 119 055, 087, 119



Soy pot root samples

wk0 wk10 wk40
050, 062, 086 012, 030, 056, 080, 094 030, 080, 094



Integrate regions of interest

Modified from “Berhe_Ghezzehei_Lab/DRIFTS Data Analysis/Visualizing_FTIR_w_Elemental_Data.R” which starts by calculating aliphatic and aromatic indices, then deriving simple and complex plant and microbial proportions before visualizing everything. This procedure sums non-normalized absorbance values over a specified range, and divides by the total peak area in the ranges of interest to get the proportions.

Spectral Integration Processing

# Function to perform spectral integration on DPT data (based on Leila_code_modified.Rmd)
perform_spectral_integration <- function(dpt_data) {
  cat("Starting spectral integration processing...\n")
  
  # Load required library for integration
  if (!require(pracma, quietly = TRUE)) {
    install.packages("pracma")
    library(pracma)
  }
  
  # Prepare the data in the format needed for integration
  # Rename columns to match expected format
  comb <- dpt_data %>%
    select(filename, wavenumber, absorbance) %>%
    rename(sample = filename, wavelength = wavenumber, abs = absorbance)
  
  # Define spectral windows of interest (same as Leila's code)
  red1 <- comb[comb$wavelength > 2839 & comb$wavelength < 2870, ]  # Aliphatic 1
  red2 <- comb[comb$wavelength > 2898 & comb$wavelength < 2976, ]  # Aliphatic 2  
  red3 <- comb[comb$wavelength > 1580 & comb$wavelength < 1660, ]  # Microbial
  red4 <- comb[comb$wavelength > 1500 & comb$wavelength < 1550, ]  # Complex plant
  
  # Get unique samples
  sample <- unique(comb$sample)
  
  # Create integration results data frame
  ints <- data.frame(
    sample = sample,
    int_2839_2870 = numeric(length(sample)),
    int_2898_2976 = numeric(length(sample)),
    int_1580_1660 = numeric(length(sample)),
    int_1500_1550 = numeric(length(sample)),
    stringsAsFactors = FALSE
  )
  
  # Calculate integrated areas for each spectral window and sample
  cat("Processing", length(sample), "samples for integration...\n")
  for (i in seq_len(nrow(ints))) {
    current_sample <- sample[i]
    
    # Extract data for current sample from each spectral window
    sample_red1 <- red1[red1$sample == current_sample, ]
    sample_red2 <- red2[red2$sample == current_sample, ]
    sample_red3 <- red3[red3$sample == current_sample, ]
    sample_red4 <- red4[red4$sample == current_sample, ]
    
    # Calculate area under curve (integration) using trapezoidal rule
    if (nrow(sample_red1) > 1) {
      ints$int_2839_2870[i] <- pracma::trapz(sample_red1$wavelength, sample_red1$abs)
    }
    
    if (nrow(sample_red2) > 1) {
      ints$int_2898_2976[i] <- pracma::trapz(sample_red2$wavelength, sample_red2$abs)
    }
    
    if (nrow(sample_red3) > 1) {
      ints$int_1580_1660[i] <- pracma::trapz(sample_red3$wavelength, sample_red3$abs)
    }
    
    if (nrow(sample_red4) > 1) {
      ints$int_1500_1550[i] <- pracma::trapz(sample_red4$wavelength, sample_red4$abs)
    }
  }
  
  # Use existing metadata from the input data (no need to re-extract)
  cat("Using existing metadata from DPT data...\n")
  basic_meta <- dpt_data %>%
    select(filename, crop, timepoint, sample_type, ID) %>%
    distinct() %>%
    rename(sample = filename, type = sample_type)
  
  # Combine metadata with integration results
  ftir_data <- merge(basic_meta, ints, by = "sample", all = TRUE)
  
  # Rename integration columns to match expected names
  names(ftir_data)[names(ftir_data) == "int_1580_1660"] <- "int_microbial"
  names(ftir_data)[names(ftir_data) == "int_1500_1550"] <- "int_complex_plant"
  
  # Calculate derived metrics (same as original)
  ftir_data$aliphatic <- ftir_data$int_2839_2870 + ftir_data$int_2898_2976
  ftir_data$total_peak_area <- ftir_data$aliphatic + ftir_data$int_microbial + ftir_data$int_complex_plant
  ftir_data$simple_plant_prop <- ftir_data$aliphatic / ftir_data$total_peak_area
  ftir_data$complex_plant_prop <- ftir_data$int_complex_plant / ftir_data$total_peak_area
  ftir_data$microbial_prop <- ftir_data$int_microbial / ftir_data$total_peak_area
  ftir_data$simple_microbial_prop <- ftir_data$simple_plant_prop / ftir_data$microbial_prop
  ftir_data$complex_microbial_prop <- ftir_data$complex_plant_prop / ftir_data$microbial_prop
  
  # Remove individual aliphatic integration columns (keep only derived metrics)
  ftir_data <- ftir_data %>%
    select(-int_2839_2870, -int_2898_2976)
  
  # Save the processed data to the output directory
  today <- format(Sys.Date(), "%Y-%m-%d")
  output_file <- file.path(outputs_folder, paste0("processed_int_data_", today, ".csv"))
  write.csv(ftir_data, file = output_file, row.names = FALSE)
  cat("Saved processed integration data to:", output_file, "\n")
  
  cat("Integration processing completed successfully.\n")
  return(ftir_data)
}
# Try to find the most recent processed integration data file
ftir_file <- find_most_recent_processed_file(integrated_ftir_data)
## Found 9 processed integration files
## Using most recent file: processed_int_data_2025-10-16.csv
if (!is.null(ftir_file) && file.exists(ftir_file)) {
  ftir_data <- read_csv(ftir_file, show_col_types = FALSE)
  cat("Successfully loaded FTIR integration data from:", basename(ftir_file), "\n")
  cat("Dimensions:", dim(ftir_data), "\n")
  cat("Columns:", names(ftir_data), "\n")
} else {
  cat("No processed integration data found. Processing raw DPT data to generate integration results...\n")
  
  # Check if we have the required dpt_data_roots from previous chunk
  if (!exists("dpt_data_roots") || nrow(dpt_data_roots) == 0) {
    stop("No DPT data available for integration. Please ensure the DPT data import was successful.")
  }
  
  # Process raw DPT data using the integrated function
  ftir_data <- perform_spectral_integration(dpt_data_roots)
  
  # Verify the processing was successful
  if (is.null(ftir_data) || nrow(ftir_data) == 0) {
    stop("Failed to process DPT data for integration.")
  }
  
  cat("Successfully processed", nrow(ftir_data), "samples from DPT data\n")
  cat("Generated columns:", names(ftir_data), "\n")
}
## Successfully loaded FTIR integration data from: processed_int_data_2025-10-16.csv 
## Dimensions: 32 17 
## Columns: source ID isotope crop timepoint type notes run_date int_microbial int_complex_plant aliphatic total_peak_area simple_plant_prop complex_plant_prop microbial_prop simple_microbial_prop complex_microbial_prop
# Calculate summary statistics by group
crop_stats <- ftir_data %>%
  mutate(crop = factor(crop, levels = c("noPlant", "wheat", "rice", "soy"))) %>%
  group_by(crop) %>%
  summarise(
    Mean_simple_plant_prop = mean(simple_plant_prop, na.rm = TRUE),
    SE_simple_plant_prop = sd(simple_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_complex_plant_prop = mean(complex_plant_prop, na.rm = TRUE),
    SE_complex_plant_prop = sd(complex_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_microbial_prop = mean(microbial_prop, na.rm = TRUE),
    SE_microbial_prop = sd(microbial_prop, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  )

timepoint_stats <- ftir_data %>%
  group_by(timepoint) %>%
  summarise(
    Mean_simple_plant_prop = mean(simple_plant_prop, na.rm = TRUE),
    SE_simple_plant_prop = sd(simple_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_complex_plant_prop = mean(complex_plant_prop, na.rm = TRUE),
    SE_complex_plant_prop = sd(complex_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_microbial_prop = mean(microbial_prop, na.rm = TRUE),
    SE_microbial_prop = sd(microbial_prop, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  )

crop_timepoint_stats <- ftir_data %>%
  mutate(crop = factor(crop, levels = c("noPlant", "wheat", "rice", "soy"))) %>%
  group_by(crop, timepoint) %>%
  summarise(
    Mean_simple_plant_prop = mean(simple_plant_prop, na.rm = TRUE),
    SE_simple_plant_prop = sd(simple_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_complex_plant_prop = mean(complex_plant_prop, na.rm = TRUE),
    SE_complex_plant_prop = sd(complex_plant_prop, na.rm = TRUE) / sqrt(n()),
    Mean_microbial_prop = mean(microbial_prop, na.rm = TRUE),
    SE_microbial_prop = sd(microbial_prop, na.rm = TRUE) / sqrt(n()),
    .groups = 'drop'
  )

Visualizing integrated ranges

This code is modified from “Visualizing_FTIR_w_Elemental_Data.R”

# Recreate the EXACT plotting function from Leila_code_modified.Rmd
create_proportion_plots <- function(stats_data, group_col, title_prefix) {
  # Choose colors based on grouping variable
  colors_to_use <- if(group_col == "crop") crop_colors else timepoint_colors
  
  p1 <- ggplot(data = stats_data, aes_string(x = group_col, y = "Mean_simple_plant_prop", fill = group_col)) +
    geom_col(alpha = 0.8) +
    geom_errorbar(aes_string(ymin = "Mean_simple_plant_prop - SE_simple_plant_prop", 
                            ymax = "Mean_simple_plant_prop + SE_simple_plant_prop"),
                  width = 0.2) +
    theme_classic() +
    scale_fill_manual(values = colors_to_use) +
    labs(title = paste0(title_prefix, "Simple Plant OM"),
         x = str_to_title(group_col),
         y = "Simple Plant OM Proportion") +
    theme(legend.position = "none",
          axis.text.x = element_text(angle = 45, hjust = 1),
          plot.margin = margin(20, 20, 20, 20),
          axis.title = element_text(size = 12),
          plot.title = element_text(size = 10))

  p2 <- ggplot(data = stats_data, aes_string(x = group_col, y = "Mean_complex_plant_prop", fill = group_col)) +
    geom_col(alpha = 0.8) +
    geom_errorbar(aes_string(ymin = "Mean_complex_plant_prop - SE_complex_plant_prop", 
                            ymax = "Mean_complex_plant_prop + SE_complex_plant_prop"),
                  width = 0.2) +
    theme_classic() +
    scale_fill_manual(values = colors_to_use) +
    labs(title = paste0(title_prefix, "Complex Plant OM"),
         x = str_to_title(group_col),
         y = "Complex Plant OM Proportion") +
    theme(legend.position = "none",
          axis.text.x = element_text(angle = 45, hjust = 1),
          plot.margin = margin(20, 20, 20, 20),
          axis.title = element_text(size = 12),
          plot.title = element_text(size = 10))

  p3 <- ggplot(data = stats_data, aes_string(x = group_col, y = "Mean_microbial_prop", fill = group_col)) +
    geom_col(alpha = 0.8) +
    geom_errorbar(aes_string(ymin = "Mean_microbial_prop - SE_microbial_prop", 
                            ymax = "Mean_microbial_prop + SE_microbial_prop"),
                  width = 0.2) +
    theme_classic() +
    scale_fill_manual(values = colors_to_use) +
    labs(title = paste0(title_prefix, "Microbial OM"),
         x = str_to_title(group_col),
         y = "Microbial OM Proportion") +
    theme(legend.position = "none",
          axis.text.x = element_text(angle = 45, hjust = 1),
          plot.margin = margin(20, 20, 20, 20),
          axis.title = element_text(size = 12),
          plot.title = element_text(size = 10))

  list(p1 = p1, p2 = p2, p3 = p3)
}

# Create EXACT plots by crop (from Leila_code_modified.Rmd)
crop_plots <- create_proportion_plots(crop_stats, "crop", "")
crop_combined <- plot_grid(crop_plots$p1, crop_plots$p2, crop_plots$p3, nrow = 1, labels = "auto")
print(crop_combined)

# Create EXACT plots by timepoint (from Leila_code_modified.Rmd)
timepoint_plots <- create_proportion_plots(timepoint_stats, "timepoint", "")
timepoint_combined <- plot_grid(timepoint_plots$p1, timepoint_plots$p2, timepoint_plots$p3, nrow = 1, labels = "auto")
print(timepoint_combined)

# Recreate the EXACT interaction plot (crop x timepoint) from Leila_code_modified.Rmd
interaction_plot <- ggplot(crop_timepoint_stats) +
  geom_col(aes(x = crop, y = Mean_simple_plant_prop, fill = timepoint),
           position = "dodge", alpha = 0.8) +
  geom_errorbar(aes(x = crop,
                    ymin = Mean_simple_plant_prop - SE_simple_plant_prop,
                    ymax = Mean_simple_plant_prop + SE_simple_plant_prop,
                    group = timepoint),
                position = position_dodge(0.9), width = 0.2) +
  theme_classic() +
  scale_fill_manual("Timepoint", values = timepoint_colors) +
  labs(title = "Simple Plant OM by Crop and Timepoint",
       x = "Crop Type",
       y = "Simple Plant OM Proportion") +
  theme(legend.position = "top",
        plot.margin = margin(20, 20, 20, 20),
        axis.text.x = element_text(angle = 45, hjust = 1))

print(interaction_plot)

# Recreate EXACT stacked bar plots from Leila_code_modified.Rmd
create_stacked_data <- function(stats_data, group_col) {
  # Reshape data for stacking (exact reproduction)
  long_data <- stats_data %>%
    select(all_of(group_col), Mean_simple_plant_prop, Mean_complex_plant_prop, Mean_microbial_prop) %>%
    pivot_longer(cols = starts_with("Mean_"),
                 names_to = "func_type",
                 values_to = "proportion") %>%
    mutate(func_type = case_when(
      func_type == "Mean_simple_plant_prop" ~ "Simple Plant",
      func_type == "Mean_complex_plant_prop" ~ "Complex Plant",
      func_type == "Mean_microbial_prop" ~ "Microbial"
    )) %>%
    mutate(func_type = factor(func_type, levels = c("Complex Plant", "Simple Plant", "Microbial")))

  long_data
}

# Create EXACT stacked plots
crop_stacked_data <- create_stacked_data(crop_stats, "crop")
crop_stacked_data$crop <- factor(crop_stacked_data$crop, levels = c("noPlant", "wheat", "rice", "soy"))
timepoint_stacked_data <- create_stacked_data(timepoint_stats, "timepoint")

p_crop_stacked <- ggplot(crop_stacked_data, aes(x = crop, y = proportion * 100, fill = func_type)) +
  geom_col(position = "stack") +
  scale_fill_manual(values = c("#93deed", "#d2f0f4", "#1ca5cf"),
                    breaks = c("Microbial", "Simple Plant", "Complex Plant")) +
  theme_classic() +
  labs(x = "Crop Type",
       y = "Functional Group Amount (%)",
       fill = "") +
  theme(legend.position = "top",
        plot.margin = margin(20, 27, 20, 13),
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.margin = margin(10, 16, 10, 8))

p_timepoint_stacked <- ggplot(timepoint_stacked_data, aes(x = timepoint, y = proportion * 100, fill = func_type)) +
  geom_col(position = "stack") +
  scale_fill_manual(values = c("#e8aa9c", "#f5d8d1", "#c54e30"),
                    breaks = c("Microbial", "Simple Plant", "Complex Plant")) +
  theme_classic() +
  labs(x = "Timepoint",
       y = "Functional Group Amount (%)",
       fill = "") +
  theme(legend.position = "top",
        plot.margin = margin(20, 27, 20, 13),
        axis.text.x = element_text(angle = 45, hjust = 1),
        legend.margin = margin(10, 16, 10, 8))

combined_stacked <- plot_grid(
  p_crop_stacked,
  p_timepoint_stacked,
  nrow = 1,
  labels = "auto"
)
print(combined_stacked)

Ridge Plots with Suberin Peak Annotations

These ridge plots show the same spectral data as the original Ridge Plots section, but with annotations highlighting suberin-related peaks identified in the literature. The annotations indicate peaks associated with suberin content in plant roots, based on White et al. (2011, 2016).

Aliphatic suberin moiety: 2976, 2930, 2856 cm-1 (within 3000-2800 cm-1 range)
Suberin esters: 1737-1740 cm-1
Aromatic suberin moiety: 1506 cm-1 (also lignin)

White, K. E., Reeves, J. B., & Coale, F. J. (2011). Mid-infrared diffuse reflectance spectroscopy for the rapid analysis of plant root composition. Geoderma, 167–168, 197–203. https://doi.org/10.1016/j.geoderma.2011.08.009
or
White, K. E., Reeves, J. B., & Coale, F. J. (2016). Cell wall compositional changes during incubation of plant roots measured by mid-infrared diffuse reflectance spectroscopy and fiber analysis. Geoderma, 264, 205–213. https://doi.org/10.1016/j.geoderma.2015.10.018

# Generic function to create ridge plots with suberin annotations
create_suberin_ridge_plot <- function(data, timepoint_week, title = NULL) {
  # Filter data for specific timepoint
  filtered_data <- data %>%
    filter(timepoint == timepoint_week) %>%
    mutate(crop = factor(crop, levels = c("noPlant", "wheat", "rice", "soy")))
  
  # Create title if not provided
  if (is.null(title)) {
    week_num <- str_extract(timepoint_week, "\\d+")
    title <- paste("Root suberin at week", week_num)
  }
  
  # Determine y-position for annotations based on data
  max_y <- max(filtered_data$normalized_absorbance)*(length(unique(filtered_data$crop))+3)
  
  # Create the plot
  ridge_plot <- filtered_data %>%
    ggplot(mapping = aes(x = wavenumber,
                         y = crop,
                         height = normalized_absorbance,
                         group = crop,
                         fill = crop,
                         color = crop)) +
    geom_density_ridges(stat = "identity",
                        scale = 3,
                        alpha = 0.25) +
    
    # Suberin peak annotations
    # Aliphatic suberin region (3000-2800 cm-1)
    annotate("rect", xmin = 2800, xmax = 3000, ymin = -Inf, ymax = Inf, 
             alpha = 0.1, fill = "#5076ab") +
    # Specific aliphatic peaks
    geom_vline(xintercept = 2976, linetype = "longdash", linewidth = 0.3, color = "#3D6AA9") +
    geom_vline(xintercept = 2930, linetype = "longdash", linewidth = 0.3, color = "#3D6AA9") +
    geom_vline(xintercept = 2856, linetype = "longdash", linewidth = 0.3, color = "#3D6AA9") +
    
    # Suberin esters (1737-1740 cm-1)
    annotate("rect", xmin = 1733, xmax = 1741, ymin = -Inf, ymax = Inf, 
             alpha = 0.15, fill = "#4d653a") +
    geom_vline(xintercept = 1737, linetype = "dashed", linewidth = 0.3, color = "#4d653a") +

    # Aromatic suberin (1506 cm-1)
    geom_vline(xintercept = 1506, linetype = "dotted", linewidth = 0.3, color = "#5f3e38") +
    
    xlim(4000, 400) +
     scale_y_discrete(expand = expansion(mult = c(0.01, 0.56))) +  # Less space below, more above
    theme_classic() +
    scale_fill_manual(values = crop_colors) +
    scale_color_manual(values = crop_colors) +
    ggtitle(title) +
   # spacer
    annotate("text", x = 1900, y = max_y + 0.2, label = " ", 
             size = 3, hjust = 0.5) +
    # Add aliphatic annotation
    annotate("text", x = 2450, y = max_y-0.2, label = "aliphatic suberin", 
             size = 3, hjust = 0.5) +
    # Add esters annotation
    annotate("text", x = 2060, y = max_y, label = "suberin esters", 
             size = 3, hjust = 0.5) +
    # Add aromatic annotation
    annotate("text", x = 1150, y = max_y + 0.1, label = "aromatic suberin", 
             size = 3, hjust = 0.5)

  return(list(plot = ridge_plot, data = filtered_data))
}

# Get unique timepoints and sort them (use dpt_data_roots, not ftir_data)
timepoints <- sort(unique(dpt_data_roots$timepoint))

# Create plots and tables for each timepoint
timepoints <- sort(unique(dpt_data_roots$timepoint))

for (tp in timepoints) {
  # Create plot and get filtered data
  result <- create_suberin_ridge_plot(dpt_data_roots, tp)
  
  # Print the plot
  print(result$plot)
  
  # Create and print sample count table with results='asis'
  counts_table <- create_sample_count_table(result$data, tp)
  cat(knitr::kable(counts_table, format = "html", escape = FALSE))
  cat("<br><br>")  # Add HTML line breaks for spacing
}
Week 0 samples
2    wheat
3    rice
3    soy


Week 10 samples
2    wheat
4    rice
5    soy


Week 30 samples
1    noPlant


Week 40 samples
1    noPlant
3    wheat
3    rice
3    soy



# Define crop-specific color palettes
crop_timepoint_colors <- list(
  noPlant = c("wk0" = "#B68ADE", "wk10" = "#9C6BC7", "wk20" = "#7B41A9",
              "wk30" = "#6f3e94", "wk40" = "#3F1C59"),
  wheat = c("wk0" = "#B4D586", "wk10" = "#98C559", "wk20" = "#79A63A",
            "wk30" = "#58792A", "wk40" = "#374B1B"),
  rice = c("wk0" = "#FDB35E", "wk10" = "#FC9D33", "wk29" = "#F18204",
           "wk40" = "#B56203"),
  soy = c("wk0" = "#FE71B1", "wk10" = "#FE318E", "wk20" = "#F4016E",
          "wk30" = "#B70153", "wk40" = "#7A0137")
)
# Generic function to create crop-specific DRIFTS plots with suberin annotations 
create_crop_plot <- function(data, crop_name, color_palette) {
  # Filter data for specific crop
  crop_data <- data %>%
    group_by(timepoint) %>%
    filter(crop == crop_name)
  
  # Ensure absorbance is numeric
  crop_data$normalized_absorbance <- as.numeric(crop_data$normalized_absorbance)
  
  # Create the plot
  crop_plot <- ggplot(crop_data,
                      aes(x = wavenumber,
                          y = normalized_absorbance,
                          color = timepoint)) +
    xlim(4000, 1000) +
    geom_line() +
    theme_classic() +
    scale_color_manual(values = color_palette) +
    ggtitle(paste(str_to_title(crop_name), "root suberin")) +
    # Suberin peak annotations
    # Aliphatic suberin region (3000-2800 cm-1)
    annotate("rect", xmin = 2800, xmax = 3000, ymin = -Inf, ymax = Inf, 
             alpha = 0.1, fill = "#5076ab") +
    # Specific aliphatic peaks
    geom_vline(xintercept = 2976, linetype = "longdash", linewidth = 0.3, color = "#3D6AA9") +
    geom_vline(xintercept = 2930, linetype = "longdash", linewidth = 0.3, color = "#3D6AA9") +
    geom_vline(xintercept = 2856, linetype = "longdash", linewidth = 0.3, color = "#3D6AA9") +
    
    # Suberin esters (1737-1740 cm-1)
    annotate("rect", xmin = 1733, xmax = 1741, ymin = -Inf, ymax = Inf, 
             alpha = 0.15, fill = "#4d653a") +
    geom_vline(xintercept = 1737, linetype = "dashed", linewidth = 0.3, color = "#4d653a") +

    # Aromatic suberin (1506 cm-1)
    geom_vline(xintercept = 1506, linetype = "dotted", linewidth = 0.3, color = "#5f3e38") +
    # Add aliphatic annotation
    annotate("text", x = 2500, y = max(crop_data$normalized_absorbance)-0.05, label = "aliphatic suberin", 
             size = 3, hjust = 0.5) +
    # Add esters annotation
    annotate("text", x = 2010, y = max(crop_data$normalized_absorbance), label = "suberin esters", 
             size = 3, hjust = 0.5) +
    # Add aromatic annotation
    annotate("text", x = 1200, y = max(crop_data$normalized_absorbance)+0.02, label = "aromatic suberin", 
             size = 3, hjust = 0.5)

  
  return(list(plot = crop_plot, data = crop_data))
}

# # Generic function to create crop ID tables
# create_crop_id_table <- function(crop_data, crop_name) {
#   combinations <- crop_data %>%
#     select(ID, timepoint) %>%
#     distinct() %>%
#     arrange(ID, timepoint) %>%
#     group_by(timepoint) %>%
#     summarise(pot_ids = {
#       ids <- paste(ID, sep = "")
#       # Group every 3 items and join with line breaks
#       grouped <- split(ids, ceiling(seq_along(ids)/5))
#       lines <- sapply(grouped, function(x) paste(x, collapse = ",&ensp;"))
#       paste(lines, collapse = "<br>")
#     }, .groups = 'drop') %>%
#     pivot_wider(names_from = timepoint, values_from = pot_ids)
  
#   return(combinations)
# }

# Generate plots and tables for each crop
crops <- c("noPlant", "wheat", "rice", "soy")

for (crop in crops) {
  # Create plot and get data
  result <- create_crop_plot(dpt_data_roots, crop, crop_timepoint_colors[[crop]])
  
  # Print the plot
  print(result$plot)
  cat("<br>")  # Add space between plot and table
  
  # Create and print ID combinations table
  id_table <- create_crop_id_table(result$data, crop)
  cat(paste0("<h4>", str_to_title(crop), " pot root samples</h4>"))
  cat(knitr::kable(id_table, format = "html", escape = FALSE, table.attr = 'style="border-collapse: separate; border-spacing: 35px 0;"'))
  cat("<br><br>")  # Add HTML line breaks for spacing
}

Noplant pot root samples

wk30 wk40
052 052



Wheat pot root samples

wk0 wk10 wk40
001, 032 029, 053 029, 053, 109



Rice pot root samples

wk0 wk10 wk40
103, 107 055, 087, 119 055, 087, 119



Soy pot root samples

wk0 wk10 wk40
050, 062, 086 012, 030, 056, 080, 094 030, 080, 094



Summary

# Print summary information
cat("Summary of FTIR Analysis:\n")
## Summary of FTIR Analysis:
cat("Number of samples:", nrow(ftir_data), "\n")
## Number of samples: 32
cat("Crops:", paste(unique(ftir_data$crop), collapse = ", "), "\n")
## Crops: wheat, soy, rice, noPlant
cat("Timepoints:", paste(unique(ftir_data$timepoint), collapse = ", "), "\n")
## Timepoints: wk0, wk10, wk40, wk30
# Display summary statistics
print("Crop Statistics:")
## [1] "Crop Statistics:"
print(crop_stats)
## # A tibble: 4 × 7
##   crop    Mean_simple_plant_prop SE_simple_plant_prop Mean_complex_plant_prop
##   <fct>                    <dbl>                <dbl>                   <dbl>
## 1 noPlant                  0.397              0.0354                    0.205
## 2 wheat                    0.430              0.0103                    0.192
## 3 rice                     0.451              0.00973                   0.182
## 4 soy                      0.368              0.0292                    0.199
## # ℹ 3 more variables: SE_complex_plant_prop <dbl>, Mean_microbial_prop <dbl>,
## #   SE_microbial_prop <dbl>
print("\nTimepoint Statistics:")
## [1] "\nTimepoint Statistics:"
print(timepoint_stats)
## # A tibble: 4 × 7
##   timepoint Mean_simple_plant_prop SE_simple_plant_prop Mean_complex_plant_prop
##   <chr>                      <dbl>                <dbl>                   <dbl>
## 1 wk0                        0.436              0.0204                    0.181
## 2 wk10                       0.398              0.0313                    0.190
## 3 wk30                       0.362             NA                         0.216
## 4 wk40                       0.412              0.00620                   0.203
## # ℹ 3 more variables: SE_complex_plant_prop <dbl>, Mean_microbial_prop <dbl>,
## #   SE_microbial_prop <dbl>